[IA64] TLB tracking
authorawilliam@xenbuild.aw <awilliam@xenbuild.aw>
Sat, 14 Oct 2006 23:42:00 +0000 (17:42 -0600)
committerawilliam@xenbuild.aw <awilliam@xenbuild.aw>
Sat, 14 Oct 2006 23:42:00 +0000 (17:42 -0600)
Add tlb insert tracking to flush finer grained virtual address
range when a page is unmapped from a domain.
This functionality is enabled with a compile time option,
xen_ia64_tlb_track(default is y) and xen_ia64_tlb_track_cnt(default is n).

This patch forcuses on grant table mapping.
When page is unmapped, full vTLB flush is necessary.
By tracking tlb insert on grant mapped page, full vTLB flush
can be avoided.
Especially when vbd backend does only DMA, so dom0 doesn't insert tlb entry
on the grant mapped page. In such case any vTLB flush isn't needed.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
18 files changed:
xen/arch/ia64/Rules.mk
xen/arch/ia64/xen/Makefile
xen/arch/ia64/xen/domain.c
xen/arch/ia64/xen/faults.c
xen/arch/ia64/xen/mm.c
xen/arch/ia64/xen/tlb_track.c [new file with mode: 0644]
xen/arch/ia64/xen/vcpu.c
xen/arch/ia64/xen/vhpt.c
xen/include/asm-ia64/domain.h
xen/include/asm-ia64/linux-xen/asm/pgtable.h
xen/include/asm-ia64/p2m_entry.h [new file with mode: 0644]
xen/include/asm-ia64/perfc_defn.h
xen/include/asm-ia64/tlb_track.h [new file with mode: 0644]
xen/include/asm-ia64/tlbflush.h
xen/include/asm-ia64/vcpu.h
xen/include/asm-ia64/vcpumask.h [new file with mode: 0644]
xen/include/asm-ia64/vhpt.h
xen/include/public/arch-ia64.h

index 9d943ffe313c30d31c9a5a689a3e09602a17d064..ad7e3c54b6735ad9a5649e404a59dd47b76365ee 100644 (file)
@@ -7,6 +7,8 @@ VALIDATE_VT     ?= n
 no_warns ?= n
 xen_ia64_expose_p2m    ?= y
 xen_ia64_pervcpu_vhpt  ?= y
+xen_ia64_tlb_track     ?= y
+xen_ia64_tlb_track_cnt ?= n
 
 ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
 CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
@@ -44,6 +46,12 @@ endif
 ifeq ($(xen_ia64_pervcpu_vhpt),y)
 CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT
 endif
+ifeq ($(xen_ia64_tlb_track),y)
+CFLAGS += -DCONFIG_XEN_IA64_TLB_TRACK
+endif
+ifeq ($(xen_ia64_tlb_track_cnt),y)
+CFLAGS += -DCONFIG_TLB_TRACK_CNT
+endif
 ifeq ($(no_warns),y)
 CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized
 endif
index c5d26ded56c2bb5d8a4573b4a84dd18cac2465c9..428bd71566b2882407a65e664aa9c360de03c024 100644 (file)
@@ -29,3 +29,4 @@ obj-y += xenpatch.o
 obj-y += xencomm.o
 
 obj-$(crash_debug) += gdbstub.o
+obj-$(xen_ia64_tlb_track) += tlb_track.o
index 4414e0ef45237bbc2dff914358cec0218171e968..38940c2d5643840fe921a690a3d2023113da9893 100644 (file)
@@ -47,6 +47,7 @@
 #include <asm/dom_fw.h>
 #include <asm/shadow.h>
 #include <xen/guest_access.h>
+#include <asm/tlb_track.h>
 
 unsigned long dom0_size = 512*1024*1024;
 unsigned long dom0_align = 64*1024*1024;
@@ -390,6 +391,8 @@ int arch_domain_create(struct domain *d)
        DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n",
                __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt);
 #endif
+       if (tlb_track_create(d) < 0)
+               goto fail_nomem1;
        d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT));
        if (d->shared_info == NULL)
            goto fail_nomem;
@@ -418,6 +421,8 @@ int arch_domain_create(struct domain *d)
        return 0;
 
 fail_nomem:
+       tlb_track_destroy(d);
+fail_nomem1:
        if (d->arch.mm.pgd != NULL)
            pgd_free(d->arch.mm.pgd);
        if (d->shared_info != NULL)
@@ -433,6 +438,8 @@ void arch_domain_destroy(struct domain *d)
        if (d->arch.shadow_bitmap != NULL)
                xfree(d->arch.shadow_bitmap);
 
+       tlb_track_destroy(d);
+
        /* Clear vTLB for the next domain.  */
        domain_flush_tlb_vhpt(d);
 
index 13e92c9493b12d11a36db56824d2a4132519efe7..75be0088e73ec33aa9f8194f93fbabc9aed63819 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/asm-xsi-offsets.h>
 #include <asm/shadow.h>
 #include <asm/uaccess.h>
+#include <asm/p2m_entry.h>
 
 extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
 /* FIXME: where these declarations shold be there ? */
@@ -202,8 +203,11 @@ void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_reg
        fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha);
        if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
                struct p2m_entry entry;
-               pteval = translate_domain_pte(pteval, address, itir, &logps, &entry);
-               vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps);
+               unsigned long m_pteval;
+               m_pteval = translate_domain_pte(pteval, address, itir,
+                                               &logps, &entry);
+               vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4, 
+                                address, m_pteval, pteval, logps, &entry);
                if ((fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) ||
                    p2m_entry_retry(&entry)) {
                        /* dtlb has been purged in-between.  This dtlb was
index 07b50c38207c644590bb737f4cd9ad189354d3a8..ec2b1da969f58aeb1bc17406a2b8bace36cfa33a 100644 (file)
 #include <asm/vhpt.h>
 #include <asm/vcpu.h>
 #include <asm/shadow.h>
+#include <asm/p2m_entry.h>
+#include <asm/tlb_track.h>
 #include <linux/efi.h>
 #include <xen/guest_access.h>
 #include <asm/page.h>
 #include <public/memory.h>
 
 static void domain_page_flush(struct domain* d, unsigned long mpaddr,
-                              unsigned long old_mfn, unsigned long new_mfn);
+                              volatile pte_t* ptep, pte_t old_pte);
 
 extern unsigned long ia64_iobase;
 
@@ -798,12 +800,15 @@ flags_to_prot (unsigned long flags)
 
     res |= flags & ASSIGN_readonly ? _PAGE_AR_R: _PAGE_AR_RWX;
     res |= flags & ASSIGN_nocache ? _PAGE_MA_UC: _PAGE_MA_WB;
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+    res |= flags & ASSIGN_tlb_track ? _PAGE_TLB_TRACKING: 0;
+#endif
     
     return res;
 }
 
 /* map a physical address to the specified metaphysical addr */
-// flags: currently only ASSIGN_readonly, ASSIGN_nocache
+// flags: currently only ASSIGN_readonly, ASSIGN_nocache, ASSIGN_tlb_tack
 // This is called by assign_domain_mmio_page().
 // So accessing to pte is racy.
 int
@@ -1034,7 +1039,7 @@ assign_domain_mach_page(struct domain *d,
 // caller must call set_gpfn_from_mfn() before call if necessary.
 // because set_gpfn_from_mfn() result must be visible before pte xchg
 // caller must use memory barrier. NOTE: xchg has acquire semantics.
-// flags: currently only ASSIGN_readonly
+// flags: ASSIGN_xxx
 static void
 assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
                            unsigned long mfn, unsigned long flags)
@@ -1068,7 +1073,7 @@ assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
                 set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
             }
 
-            domain_page_flush(d, mpaddr, old_mfn, mfn);
+            domain_page_flush(d, mpaddr, pte, old_pte);
 
             try_to_clear_PGC_allocate(d, old_page);
             put_page(old_page);
@@ -1088,7 +1093,7 @@ assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
     struct mm_struct *mm = &d->arch.mm;
     volatile pte_t* pte;
     unsigned long old_mfn;
-    unsigned long old_arflags;
+    unsigned long old_prot;
     pte_t old_pte;
     unsigned long new_mfn;
     unsigned long new_prot;
@@ -1098,12 +1103,12 @@ assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
     pte = lookup_alloc_domain_pte(d, mpaddr);
 
  again:
-    old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;
+    old_prot = pte_val(*pte) & ~_PAGE_PPN_MASK;
     old_mfn = page_to_mfn(old_page);
-    old_pte = pfn_pte(old_mfn, __pgprot(old_arflags));
+    old_pte = pfn_pte(old_mfn, __pgprot(old_prot));
     if (!pte_present(old_pte)) {
-        DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx\n",
-                __func__, pte_val(old_pte), old_arflags, old_mfn);
+        DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx\n",
+                __func__, pte_val(old_pte), old_prot, old_mfn);
         return -EINVAL;
     }
 
@@ -1118,10 +1123,10 @@ assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
             goto again;
         }
 
-        DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx "
+        DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx "
                 "ret_pte 0x%lx ret_mfn 0x%lx\n",
                 __func__,
-                pte_val(old_pte), old_arflags, old_mfn,
+                pte_val(old_pte), old_prot, old_mfn,
                 pte_val(ret_pte), pte_pfn(ret_pte));
         return -EINVAL;
     }
@@ -1133,7 +1138,7 @@ assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
 
     set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
 
-    domain_page_flush(d, mpaddr, old_mfn, new_mfn);
+    domain_page_flush(d, mpaddr, pte, old_pte);
     put_page(old_page);
     perfc_incrc(assign_domain_pge_cmpxchg_rel);
     return 0;
@@ -1202,7 +1207,7 @@ zap_domain_page_one(struct domain *d, unsigned long mpaddr, unsigned long mfn)
         set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
     }
 
-    domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
+    domain_page_flush(d, mpaddr, pte, old_pte);
 
     if (page_get_owner(page) != NULL) {
         try_to_clear_PGC_allocate(d, page);
@@ -1417,8 +1422,12 @@ create_grant_host_mapping(unsigned long gpaddr,
     BUG_ON(ret == 0);
     BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
            get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
-    assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)?
-                                              ASSIGN_readonly: ASSIGN_writable);
+    assign_domain_page_replace(d, gpaddr, mfn,
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+                               ASSIGN_tlb_track |
+#endif
+                               ((flags & GNTMAP_readonly) ?
+                                ASSIGN_readonly : ASSIGN_writable));
     perfc_incrc(create_grant_host_mapping);
     return GNTST_okay;
 }
@@ -1473,7 +1482,7 @@ destroy_grant_host_mapping(unsigned long gpaddr,
     }
     BUG_ON(pte_pfn(old_pte) != mfn);
 
-    domain_page_flush(d, gpaddr, mfn, INVALID_MFN);
+    domain_page_flush(d, gpaddr, pte, old_pte);
 
     page = mfn_to_page(mfn);
     BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed.
@@ -1645,12 +1654,43 @@ guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
 //    flush finer range.
 static void
 domain_page_flush(struct domain* d, unsigned long mpaddr,
-                  unsigned long old_mfn, unsigned long new_mfn)
+                  volatile pte_t* ptep, pte_t old_pte)
 {
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+    struct tlb_track_entry* entry;
+#endif
+
     if (shadow_mode_enabled(d))
         shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT);
 
+#ifndef CONFIG_XEN_IA64_TLB_TRACK
     domain_flush_vtlb_all();
+#else
+    switch (tlb_track_search_and_remove(d->arch.tlb_track,
+                                        ptep, old_pte, &entry)) {
+    case TLB_TRACK_NOT_TRACKED:
+        // DPRINTK("%s TLB_TRACK_NOT_TRACKED\n", __func__);
+        domain_flush_vtlb_all();
+        break;
+    case TLB_TRACK_NOT_FOUND:
+        /* do nothing */
+        // DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__);
+        break;
+    case TLB_TRACK_FOUND:
+        // DPRINTK("%s TLB_TRACK_FOUND\n", __func__);
+        domain_flush_vtlb_track_entry(d, entry);
+        tlb_track_free_entry(d->arch.tlb_track, entry);
+        break;
+    case TLB_TRACK_MANY:
+        DPRINTK("%s TLB_TRACK_MANY\n", __func__);
+        domain_flush_vtlb_all();
+        break;
+    case TLB_TRACK_AGAIN:
+        DPRINTK("%s TLB_TRACK_AGAIN\n", __func__);
+        BUG();
+        break;
+    }
+#endif
     perfc_incrc(domain_page_flush);
 }
 
diff --git a/xen/arch/ia64/xen/tlb_track.c b/xen/arch/ia64/xen/tlb_track.c
new file mode 100644 (file)
index 0000000..49a8a79
--- /dev/null
@@ -0,0 +1,506 @@
+/******************************************************************************
+ * tlb_track.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <asm/tlb_track.h>
+#include <asm/p2m_entry.h>
+#include <asm/vmx_mm_def.h>  /* for IA64_RR_SHIFT */
+#include <asm/vmx_vcpu.h>    /* for VRN7 */
+#include <asm/vcpu.h>        /* for PSCB() */
+
+#define CONFIG_TLB_TRACK_DEBUG
+#ifdef CONFIG_TLB_TRACK_DEBUG
+# define tlb_track_printd(fmt, ...)     \
+    printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#else
+# define tlb_track_printd(fmt, ...)     do { } while (0)
+#endif
+
+static int
+tlb_track_allocate_entries(struct tlb_track* tlb_track)
+{
+    struct page_info* entry_page;
+    struct tlb_track_entry* track_entries;
+    unsigned int allocated;
+    unsigned long i;
+
+    BUG_ON(tlb_track->num_free > 0);
+    if (tlb_track->num_entries >= tlb_track->limit) {
+        DPRINTK("%s: num_entries %d limit %d\n",
+                __func__, tlb_track->num_entries, tlb_track->limit);
+        return -ENOMEM;
+    }
+    entry_page = alloc_domheap_page(NULL);
+    if (entry_page == NULL) {
+        DPRINTK("%s: domheap page failed. num_entries %d limit %d\n",
+                __func__, tlb_track->num_entries, tlb_track->limit);
+        return -ENOMEM;
+    }
+
+    list_add(&entry_page->list, &tlb_track->page_list);
+    track_entries = (struct tlb_track_entry*)page_to_virt(entry_page);
+    allocated = PAGE_SIZE / sizeof(track_entries[0]);
+    tlb_track->num_entries += allocated;
+    tlb_track->num_free += allocated;
+    for (i = 0; i < allocated; i++) {
+        list_add(&track_entries[i].list, &tlb_track->free_list);
+        // tlb_track_printd("track_entries[%ld] 0x%p\n", i, &track_entries[i]);
+    }
+    tlb_track_printd("allocated %d num_entries %d num_free %d\n",
+                     allocated, tlb_track->num_entries, tlb_track->num_free);
+    return 0;
+}
+
+
+int
+tlb_track_create(struct domain* d)
+{
+    struct tlb_track* tlb_track = NULL;
+    struct page_info* hash_page = NULL;
+    unsigned int hash_size;
+    unsigned int hash_shift;
+    unsigned int i;
+
+    tlb_track = xmalloc(struct tlb_track);
+    if (tlb_track == NULL)
+        goto out;
+
+    hash_page = alloc_domheap_page(NULL);
+    if (hash_page == NULL)
+        goto out;
+
+    spin_lock_init(&tlb_track->free_list_lock);
+    INIT_LIST_HEAD(&tlb_track->free_list);
+    tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES;
+    tlb_track->num_entries = 0;
+    tlb_track->num_free = 0;
+    INIT_LIST_HEAD(&tlb_track->page_list);
+    if (tlb_track_allocate_entries(tlb_track) < 0)
+        goto out;
+
+    spin_lock_init(&tlb_track->hash_lock);
+    /* XXX hash size optimization */
+    hash_size = PAGE_SIZE / sizeof(tlb_track->hash[0]);
+    for (hash_shift = 0; (1 << (hash_shift + 1)) < hash_size; hash_shift++)
+        /* nothing */;
+    tlb_track->hash_size = (1 << hash_shift);
+    tlb_track->hash_shift = hash_shift;
+    tlb_track->hash_mask = (1 << hash_shift) - 1;
+    tlb_track->hash = page_to_virt(hash_page);
+    for (i = 0; i < tlb_track->hash_size; i++)
+        INIT_LIST_HEAD(&tlb_track->hash[i]);
+
+    smp_mb(); /* make initialization visible before use. */
+    d->arch.tlb_track = tlb_track;
+    printk("%s:%d hash 0x%p hash_size %d \n",
+           __func__, __LINE__, tlb_track->hash, tlb_track->hash_size);
+
+    return 0;
+
+out:
+    if (hash_page != NULL)
+        free_domheap_page(hash_page);
+
+    if (tlb_track != NULL)
+        xfree(tlb_track);
+
+    return -ENOMEM;
+}
+
+void
+tlb_track_destroy(struct domain* d)
+{
+    struct tlb_track* tlb_track = d->arch.tlb_track;
+    struct page_info* page;
+    struct page_info* next;
+
+    spin_lock(&tlb_track->free_list_lock);
+    BUG_ON(tlb_track->num_free != tlb_track->num_entries);
+
+    list_for_each_entry_safe(page, next, &tlb_track->page_list, list) {
+        list_del(&page->list);
+        free_domheap_page(page);
+    }
+
+    free_domheap_page(virt_to_page(tlb_track->hash));
+    xfree(tlb_track);
+    // d->tlb_track = NULL;
+}
+
+static struct tlb_track_entry*
+tlb_track_get_entry(struct tlb_track* tlb_track)
+{
+    struct tlb_track_entry* entry = NULL;
+    spin_lock(&tlb_track->free_list_lock);
+    if (tlb_track->num_free == 0)
+        (void)tlb_track_allocate_entries(tlb_track);
+
+    if (tlb_track->num_free > 0) {
+        BUG_ON(list_empty(&tlb_track->free_list));
+        entry = list_entry(tlb_track->free_list.next,
+                           struct tlb_track_entry, list);
+        tlb_track->num_free--;
+        list_del(&entry->list);
+    }
+    spin_unlock(&tlb_track->free_list_lock);
+    return entry;
+}
+
+void
+tlb_track_free_entry(struct tlb_track* tlb_track,
+                     struct tlb_track_entry* entry)
+{
+    spin_lock(&tlb_track->free_list_lock);
+    list_add(&entry->list, &tlb_track->free_list);
+    tlb_track->num_free++;
+    spin_unlock(&tlb_track->free_list_lock);
+}
+
+
+#include <linux/hash.h>
+/* XXX hash function. */
+static struct list_head*
+tlb_track_hash_head(struct tlb_track* tlb_track, volatile pte_t* ptep)
+{
+    unsigned long hash = hash_long((unsigned long)ptep, tlb_track->hash_shift);
+    BUG_ON(hash >= tlb_track->hash_size);
+    BUG_ON((hash & tlb_track->hash_mask) != hash);
+    return &tlb_track->hash[hash];
+}
+
+static int
+tlb_track_pte_zapped(pte_t old_pte, pte_t ret_pte)
+{
+    if (pte_pfn(old_pte) != pte_pfn(ret_pte) ||
+        (pte_val(old_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK)) !=
+        (pte_val(ret_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK))) {
+        /* Other thread zapped the p2m entry. */
+        return 1;
+    }
+    return 0;
+}
+
+static TLB_TRACK_RET_T
+tlb_track_insert_or_dirty(struct tlb_track* tlb_track, struct mm_struct* mm,
+                          volatile pte_t* ptep, pte_t old_pte,
+                          unsigned long vaddr, unsigned long rid)
+{
+    unsigned long mfn = pte_pfn(old_pte);
+    struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
+    struct tlb_track_entry* entry;
+    struct tlb_track_entry* new_entry = NULL;
+    unsigned long bit_to_be_set = _PAGE_TLB_INSERTED;
+    pte_t new_pte;
+    pte_t ret_pte;
+
+    struct vcpu* v = current;
+    TLB_TRACK_RET_T ret = TLB_TRACK_NOT_FOUND;
+
+#if 0 /* this is done at vcpu_tlb_track_insert_or_dirty() */
+    perfc_incrc(tlb_track_iod);
+    if (!pte_tlb_tracking(old_pte)) {
+        perfc_incrc(tlb_track_iod_not_tracked);
+        return TLB_TRACK_NOT_TRACKED;
+    }
+#endif
+    if (pte_tlb_inserted_many(old_pte)) {
+        perfc_incrc(tlb_track_iod_tracked_many);
+        return TLB_TRACK_MANY;
+    }
+
+    /* vaddr must be normalized so that it is in vrn7 and page aligned. */
+    BUG_ON((vaddr >> IA64_RR_SHIFT) != VRN7);
+    BUG_ON((vaddr & ~PAGE_MASK) != 0);
+#if 0
+    tlb_track_printd("\n"
+                     "\tmfn 0x%016lx\n"
+                     "\told_pte 0x%016lx ptep 0x%p\n"
+                     "\tptep_val 0x%016lx vaddr 0x%016lx rid %ld\n"
+                     "\ttlb_track 0x%p head 0x%p\n",
+                     mfn,
+                     pte_val(old_pte), ptep, pte_val(*ptep),
+                     vaddr, rid,
+                     tlb_track, head);
+#endif
+
+ again:
+    /*
+     * zapping side may zap the p2m entry and then remove tlb track entry
+     * non-atomically. We may see the stale tlb track entry here.
+     * p2m_entry_retry() handles such a case.
+     * Or other thread may zap the p2m entry and remove tlb track entry
+     * and inserted new tlb track entry.
+     */
+    spin_lock(&tlb_track->hash_lock);
+    list_for_each_entry(entry, head, list) {
+        if (entry->ptep != ptep)
+            continue;
+
+        if (pte_pfn(entry->pte_val) == mfn) {
+            // tlb_track_entry_printf(entry);
+            if (entry->vaddr == vaddr && entry->rid == rid) {
+                // tlb_track_printd("TLB_TRACK_FOUND\n");
+                ret = TLB_TRACK_FOUND;
+                perfc_incrc(tlb_track_iod_found);
+#ifdef CONFIG_TLB_TRACK_CNT
+                entry->cnt++;
+                if (entry->cnt > TLB_TRACK_CNT_FORCE_MANY) {
+                    /*
+                     * heuristics:
+                     * If a page is used to transfer data by dev channel,
+                     * it would be unmapped with small amount access
+                     * (once or twice tlb insert) after real device
+                     * I/O completion. It would be short period.
+                     * However this page seems to be accessed many times.
+                     * We guess that this page is used I/O ring
+                     * so that tracking this entry might be useless.
+                     */
+                     // tlb_track_entry_printf(entry);
+                     // tlb_track_printd("cnt = %ld\n", entry->cnt);
+                    perfc_incrc(tlb_track_iod_force_many);
+                    goto force_many;
+                }
+#endif
+                goto found;
+            } else {
+#ifdef CONFIG_TLB_TRACK_CNT
+            force_many:
+#endif
+                if (!pte_tlb_inserted(old_pte)) {
+                    printk("%s:%d racy update\n", __func__, __LINE__);
+                    old_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED);
+                }
+                new_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED_MANY);
+                ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
+                if (pte_val(ret_pte) != pte_val(old_pte)) {
+                    // tlb_track_printd("TLB_TRACK_AGAIN\n");
+                    ret = TLB_TRACK_AGAIN;
+                    perfc_incrc(tlb_track_iod_again);
+                } else {
+                    // tlb_track_printd("TLB_TRACK_MANY del entry 0x%p\n",
+                    //                  entry);
+                    ret = TLB_TRACK_MANY;
+                    list_del(&entry->list);
+                    // tlb_track_entry_printf(entry);
+                    perfc_incrc(tlb_track_iod_tracked_many_del);
+                }
+                goto out;
+            }
+        }
+
+        /*
+         * Other thread changed the p2m entry and removed and inserted new
+         * tlb tracn entry after we get old_pte, but before we get
+         * spinlock.
+         */
+        // tlb_track_printd("TLB_TRACK_AGAIN\n");
+        ret = TLB_TRACK_AGAIN;
+        perfc_incrc(tlb_track_iod_again);
+        goto out;
+    }
+
+    entry = NULL; // prevent freeing entry.
+    if (pte_tlb_inserted(old_pte)) {
+        /* Other thread else removed the tlb_track_entry after we got old_pte
+           before we got spin lock. */
+        ret = TLB_TRACK_AGAIN;
+        perfc_incrc(tlb_track_iod_again);
+        goto out;
+    }
+    if (new_entry == NULL && bit_to_be_set == _PAGE_TLB_INSERTED) {
+        spin_unlock(&tlb_track->hash_lock);
+        new_entry = tlb_track_get_entry(tlb_track);
+        if (new_entry == NULL) {
+            tlb_track_printd("get_entry failed\n");
+            /* entry can't be allocated.
+               fall down into full flush mode. */
+            bit_to_be_set |= _PAGE_TLB_INSERTED_MANY;
+            perfc_incrc(tlb_track_iod_new_failed);
+        }
+        // tlb_track_printd("new_entry 0x%p\n", new_entry);
+        perfc_incrc(tlb_track_iod_new_entry);
+        goto again;
+    }
+
+    BUG_ON(pte_tlb_inserted_many(old_pte));
+    new_pte = __pte(pte_val(old_pte) | bit_to_be_set);
+    ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
+    if (pte_val(old_pte) != pte_val(ret_pte)) {
+        if (tlb_track_pte_zapped(old_pte, ret_pte)) {
+            // tlb_track_printd("zapped TLB_TRACK_AGAIN\n");
+            ret = TLB_TRACK_AGAIN;
+            perfc_incrc(tlb_track_iod_again);
+            goto out;
+        }
+
+        /* Other thread set _PAGE_TLB_INSERTED and/or _PAGE_TLB_INSERTED_MANY */
+        if (pte_tlb_inserted_many(ret_pte)) {
+            /* Other thread already set _PAGE_TLB_INSERTED_MANY and
+               removed the entry. */
+            // tlb_track_printd("iserted TLB_TRACK_MANY\n");
+            BUG_ON(!pte_tlb_inserted(ret_pte));
+            ret = TLB_TRACK_MANY;
+            perfc_incrc(tlb_track_iod_new_many);
+            goto out;
+        }
+        BUG_ON(pte_tlb_inserted(ret_pte));
+        BUG();
+    }
+    if (new_entry) {
+        // tlb_track_printd("iserting new_entry 0x%p\n", new_entry);
+        entry = new_entry;
+        new_entry = NULL;
+
+        entry->ptep = ptep;
+        entry->pte_val = old_pte;
+        entry->vaddr = vaddr;
+        entry->rid = rid;
+        cpus_clear(entry->pcpu_dirty_mask);
+        vcpus_clear(entry->vcpu_dirty_mask);
+        list_add(&entry->list, head);
+
+#ifdef CONFIG_TLB_TRACK_CNT
+        entry->cnt = 0;
+#endif
+        perfc_incrc(tlb_track_iod_insert);
+        // tlb_track_entry_printf(entry);
+    } else {
+        goto out;
+    }
+
+ found:
+    BUG_ON(v->processor >= NR_CPUS);
+    cpu_set(v->processor, entry->pcpu_dirty_mask);
+    BUG_ON(v->vcpu_id >= NR_CPUS);
+    vcpu_set(v->vcpu_id, entry->vcpu_dirty_mask);
+    perfc_incrc(tlb_track_iod_dirtied);
+
+ out:
+    spin_unlock(&tlb_track->hash_lock);
+    if (ret == TLB_TRACK_MANY && entry != NULL)
+        tlb_track_free_entry(tlb_track, entry);
+    if (new_entry != NULL)
+        tlb_track_free_entry(tlb_track, new_entry);
+    return ret;
+}
+
+void
+__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
+                                 struct p2m_entry* entry)
+{
+    unsigned long vrn = vaddr >> IA64_RR_SHIFT;
+    unsigned long rid = PSCB(vcpu, rrs[vrn]);
+    TLB_TRACK_RET_T ret;
+
+    /* normalize vrn7
+       When linux dom0 case, vrn7 is the most common case. */
+    vaddr |= VRN7 << VRN_SHIFT;
+    vaddr &= PAGE_MASK;
+    ret = tlb_track_insert_or_dirty(vcpu->domain->arch.tlb_track,
+                                    &vcpu->domain->arch.mm,
+                                    entry->ptep, entry->used,
+                                    vaddr, rid);
+    if (ret == TLB_TRACK_AGAIN)
+        p2m_entry_set_retry(entry);
+}
+
+TLB_TRACK_RET_T
+tlb_track_search_and_remove(struct tlb_track* tlb_track,
+                            volatile pte_t* ptep, pte_t old_pte,
+                            struct tlb_track_entry** entryp)
+{
+    unsigned long mfn = pte_pfn(old_pte);
+    struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
+    struct tlb_track_entry* entry;
+
+    perfc_incrc(tlb_track_sar);
+    if (!pte_tlb_tracking(old_pte)) {
+        perfc_incrc(tlb_track_sar_not_tracked);
+        return TLB_TRACK_NOT_TRACKED;
+    }
+    if (!pte_tlb_inserted(old_pte)) {
+        BUG_ON(pte_tlb_inserted_many(old_pte));
+        perfc_incrc(tlb_track_sar_not_found);
+        return TLB_TRACK_NOT_FOUND;
+    }
+    if (pte_tlb_inserted_many(old_pte)) {
+        BUG_ON(!pte_tlb_inserted(old_pte));
+        perfc_incrc(tlb_track_sar_many);
+        return TLB_TRACK_MANY;
+    }
+
+    spin_lock(&tlb_track->hash_lock);
+    list_for_each_entry(entry, head, list) {
+        if (entry->ptep != ptep)
+            continue;
+
+        if (pte_pfn(entry->pte_val) == mfn) {
+            list_del(&entry->list);
+            spin_unlock(&tlb_track->hash_lock);
+            *entryp = entry;
+            perfc_incrc(tlb_track_sar_found);
+            // tlb_track_entry_printf(entry);
+#ifdef CONFIG_TLB_TRACK_CNT
+            // tlb_track_printd("cnt = %ld\n", entry->cnt);
+#endif
+            return TLB_TRACK_FOUND;
+        }
+        BUG();
+    }
+    BUG();
+    spin_unlock(&tlb_track->hash_lock);
+    return TLB_TRACK_NOT_TRACKED;
+}
+
+/* for debug */
+void
+__tlb_track_entry_printf(const char* func, int line,
+                         const struct tlb_track_entry* entry)
+{
+    char pcpumask_buf[NR_CPUS + 1];
+    char vcpumask_buf[MAX_VIRT_CPUS + 1];
+    cpumask_scnprintf(pcpumask_buf, sizeof(pcpumask_buf),
+                      entry->pcpu_dirty_mask);
+    vcpumask_scnprintf(vcpumask_buf, sizeof(vcpumask_buf),
+                       entry->vcpu_dirty_mask);
+    printk("%s:%d\n"
+           "\tmfn 0x%016lx\n"
+           "\told_pte 0x%016lx ptep 0x%p\n"
+           "\tpte_val 0x%016lx vaddr 0x%016lx rid %ld\n"
+           "\tpcpu_dirty_mask %s vcpu_dirty_mask %s\n"
+           "\tentry 0x%p\n",
+           func, line,
+           pte_pfn(entry->pte_val),
+           pte_val(entry->pte_val), entry->ptep, pte_val(*entry->ptep),
+           entry->vaddr, entry->rid,
+           pcpumask_buf, vcpumask_buf,
+           entry);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
index c27d5184497c048b425de53850bc375c5c70f4cd..9b4e1033a2c819694b7bdf5551349c87b2dce8ab 100644 (file)
@@ -24,6 +24,8 @@
 #include <asm/bundle.h>
 #include <asm/privop_stat.h>
 #include <asm/uaccess.h>
+#include <asm/p2m_entry.h>
+#include <asm/tlb_track.h>
 
 /* FIXME: where these declarations should be there ? */
 extern void getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs);
@@ -2007,7 +2009,9 @@ IA64FAULT vcpu_set_dtr(VCPU *vcpu, u64 slot, u64 pte,
  VCPU translation cache access routines
 **************************************************************************/
 
-void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps)
+void
+vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte,
+                 UINT64 mp_pte, UINT64 logps, struct p2m_entry* entry)
 {
        unsigned long psr;
        unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT;
@@ -2020,6 +2024,7 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64
                              "smaller page size!\n");
 
        BUG_ON(logps > PAGE_SHIFT);
+       vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
        psr = ia64_clear_ic();
        ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings
        ia64_set_psr(psr);
@@ -2037,7 +2042,7 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64
        // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
        else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2);
 #endif
-       if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB
+       if (IorD & 0x4) /* don't place in 1-entry TLB */
                return;
        if (IorD & 0x1) {
                vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr);
@@ -2062,7 +2067,7 @@ again:
        pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry);
        if (!pteval) return IA64_ILLOP_FAULT;
        if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0]));
-       vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps);
+       vcpu_itc_no_srlz(vcpu, 2, ifa, pteval, pte, logps, &entry);
        if (swap_rr0) set_metaphysical_rr0();
        if (p2m_entry_retry(&entry)) {
                vcpu_flush_tlb_vhpt_range(ifa, logps);
@@ -2085,7 +2090,7 @@ again:
        pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry);
        if (!pteval) return IA64_ILLOP_FAULT;
        if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0]));
-       vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps);
+       vcpu_itc_no_srlz(vcpu, 1, ifa, pteval, pte, logps, &entry);
        if (swap_rr0) set_metaphysical_rr0();
        if (p2m_entry_retry(&entry)) {
                vcpu_flush_tlb_vhpt_range(ifa, logps);
index a8220da1e148cd2bbbff77e2b50ddfeeb2526db3..5cd68f4a35ca5fddd18239f2538203f9d5bc4ebf 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/page.h>
 #include <asm/vhpt.h>
 #include <asm/vcpu.h>
+#include <asm/vcpumask.h>
 #include <asm/vmmu.h>
 
 /* Defined in tlb.c  */
@@ -42,12 +43,14 @@ void
 local_vhpt_flush(void)
 {
        __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
+       perfc_incrc(local_vhpt_flush);
 }
 
 static void
 vcpu_vhpt_flush(struct vcpu* v)
 {
        __vhpt_flush(vcpu_vhpt_maddr(v));
+       perfc_incrc(vcpu_vhpt_flush);
 }
 
 static void
@@ -170,6 +173,39 @@ pervcpu_vhpt_free(struct vcpu *v)
 }
 #endif
 
+void
+domain_purge_swtc_entries(struct domain *d)
+{
+       struct vcpu* v;
+       for_each_vcpu(d, v) {
+               if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
+                       continue;
+
+               /* Purge TC entries.
+                  FIXME: clear only if match.  */
+               vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+               vcpu_purge_tr_entry(&PSCBX(v,itlb));
+       }
+}
+
+void
+domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
+                                          vcpumask_t vcpu_dirty_mask)
+{
+       int vcpu;
+
+       for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {
+               struct vcpu* v = d->vcpu[vcpu];
+               if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
+                       continue;
+
+               /* Purge TC entries.
+                  FIXME: clear only if match.  */
+               vcpu_purge_tr_entry(&PSCBX(v, dtlb));
+               vcpu_purge_tr_entry(&PSCBX(v, itlb));
+       }
+}
+
 // SMP: we can't assume v == current, vcpu might move to another physical cpu.
 // So memory barrier is necessary.
 // if we can guranttee that vcpu can run on only this physical cpu
@@ -292,15 +328,7 @@ void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
        }
 #endif
 
-       for_each_vcpu (d, v) {
-               if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
-                       continue;
-
-               /* Purge TC entries.
-                  FIXME: clear only if match.  */
-               vcpu_purge_tr_entry(&PSCBX(v,dtlb));
-               vcpu_purge_tr_entry(&PSCBX(v,itlb));
-       }
+       domain_purge_swtc_entries(d);
        smp_mb();
 
        for_each_vcpu (d, v) {
@@ -327,6 +355,83 @@ void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
        perfc_incrc(domain_flush_vtlb_range);
 }
 
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+#include <asm/tlb_track.h>
+#include <asm/vmx_vcpu.h>
+void
+__domain_flush_vtlb_track_entry(struct domain* d,
+                                const struct tlb_track_entry* entry)
+{
+       unsigned long rr7_rid;
+       int swap_rr0 = 0;
+       unsigned long old_rid;
+       unsigned long vaddr = entry->vaddr;
+       struct vcpu* v;
+       int cpu;
+       int vcpu;
+
+       BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
+       /*
+        * heuristic:
+        * dom0linux accesses grant mapped pages via the kernel
+        * straight mapped area and it doesn't change rr7 rid. 
+        * So it is likey that rr7 == entry->rid so that
+        * we can avoid rid change.
+        * When blktap is supported, this heuristic should be revised.
+        */
+       vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);
+       if (likely(rr7_rid == entry->rid)) {
+               perfc_incrc(tlb_track_use_rr7);
+       } else {
+               swap_rr0 = 1;
+               vaddr = (vaddr << 3) >> 3;// force vrn0
+               perfc_incrc(tlb_track_swap_rr0);
+       }
+
+       // tlb_track_entry_printf(entry);
+       if (swap_rr0) {
+               vcpu_get_rr(current, 0, &old_rid);
+               vcpu_set_rr(current, 0, entry->rid);
+       }
+    
+       if (HAS_PERVCPU_VHPT(d)) {
+               for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) {
+                       v = d->vcpu[vcpu];
+                       if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
+                               continue;
+
+                       /* Invalidate VHPT entries.  */
+                       vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
+               }
+       } else {
+               for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
+                       /* Invalidate VHPT entries.  */
+                       cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
+               }
+       }
+       /* ptc.ga has release semantics. */
+
+       /* ptc.ga  */
+       ia64_global_tlb_purge(vaddr, vaddr + PAGE_SIZE, PAGE_SHIFT);
+
+       if (swap_rr0) {
+               vcpu_set_rr(current, 0, old_rid);
+       }
+       perfc_incrc(domain_flush_vtlb_track_entry);
+}
+
+void
+domain_flush_vtlb_track_entry(struct domain* d,
+                              const struct tlb_track_entry* entry)
+{
+       domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);
+       smp_mb();
+
+       __domain_flush_vtlb_track_entry(d, entry);
+}
+
+#endif
+
 static void flush_tlb_vhpt_all (struct domain *d)
 {
        /* First VHPT.  */
index 34e24e5a85c895029d6a19a0d9d21dd2d341d43b..d9f9b65c829c609a0fa675a61ee666f8ea3eb7c1 100644 (file)
 #include <asm/fpswa.h>
 #include <xen/rangeset.h>
 
-struct p2m_entry {
-    volatile pte_t*     pte;
-    pte_t               used;
-};
-
-static inline void
-p2m_entry_set(struct p2m_entry* entry, volatile pte_t* pte, pte_t used)
-{
-    entry->pte  = pte;
-    entry->used = used;
-}
-
-static inline int
-p2m_entry_retry(struct p2m_entry* entry)
-{
-    //XXX see lookup_domain_pte().
-    //    NULL is set for invalid gpaddr for the time being.
-    if (entry->pte == NULL)
-        return 0;
-
-    return (pte_val(*entry->pte) != pte_val(entry->used));
-}
+struct p2m_entry;
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+struct tlb_track;
+#endif
 
 extern void domain_relinquish_resources(struct domain *);
 struct vcpu;
@@ -140,6 +122,10 @@ struct arch_domain {
     struct last_vcpu last_vcpu[NR_CPUS];
 
     struct arch_vmx_domain arch_vmx; /* Virtual Machine Extensions */
+
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+    struct tlb_track*   tlb_track;
+#endif
 };
 #define INT_ENABLE_OFFSET(v)             \
     (sizeof(vcpu_info_t) * (v)->vcpu_id + \
index 30adb093389f05208ec7976c0ef54552a608acbe..7b23205f8507ee86994028751384f22ccf4c9ce2 100644 (file)
 #define _PAGE_VIRT_D           (__IA64_UL(1) << 53)    /* Virtual dirty bit */
 #define _PAGE_PROTNONE         0
 
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+#define _PAGE_TLB_TRACKING_BIT          54
+#define _PAGE_TLB_INSERTED_BIT          55
+#define _PAGE_TLB_INSERTED_MANY_BIT     56
+
+#define _PAGE_TLB_TRACKING              (1UL << _PAGE_TLB_TRACKING_BIT)
+#define _PAGE_TLB_INSERTED              (1UL << _PAGE_TLB_INSERTED_BIT)
+#define _PAGE_TLB_INSERTED_MANY         (1UL << _PAGE_TLB_INSERTED_MANY_BIT)
+#define _PAGE_TLB_TRACK_MASK            (_PAGE_TLB_TRACKING |          \
+                                         _PAGE_TLB_INSERTED |          \
+                                         _PAGE_TLB_INSERTED_MANY)
+
+#define pte_tlb_tracking(pte)                          \
+    ((pte_val(pte) & _PAGE_TLB_TRACKING) != 0)
+#define pte_tlb_inserted(pte)                          \
+    ((pte_val(pte) & _PAGE_TLB_INSERTED) != 0)
+#define pte_tlb_inserted_many(pte)                     \
+    ((pte_val(pte) & _PAGE_TLB_INSERTED_MANY) != 0)
+#endif // CONFIG_XEN_IA64_TLB_TRACK
+
 /* domVTI */
 #define GPFN_MEM               (0UL << 60)     /* Guest pfn is normal mem */
 #define GPFN_FRAME_BUFFER      (1UL << 60)     /* VGA framebuffer */
diff --git a/xen/include/asm-ia64/p2m_entry.h b/xen/include/asm-ia64/p2m_entry.h
new file mode 100644 (file)
index 0000000..4a2ff7e
--- /dev/null
@@ -0,0 +1,76 @@
+/******************************************************************************
+ * p2m_entry.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __ASM_P2M_ENTRY_H__
+#define __ASM_P2M_ENTRY_H__
+
+#include <asm/pgtable.h>
+
+struct p2m_entry {
+#define P2M_PTE_ALWAYS_RETRY   ((volatile pte_t*) -1)
+    volatile pte_t*     ptep;
+    pte_t               used;
+};
+
+static inline void
+p2m_entry_set(struct p2m_entry* entry, volatile pte_t* ptep, pte_t used)
+{
+    entry->ptep = ptep;
+    entry->used = used;
+}
+
+static inline void
+p2m_entry_set_retry(struct p2m_entry* entry)
+{
+    entry->ptep = P2M_PTE_ALWAYS_RETRY;
+}
+
+static inline int
+p2m_entry_retry(struct p2m_entry* entry)
+{
+    /* XXX see lookup_domain_pte().
+       NULL is set for invalid gpaddr for the time being. */
+    if (entry->ptep == NULL)
+        return 0;
+
+    if (entry->ptep == P2M_PTE_ALWAYS_RETRY)
+        return 1;
+
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+    return ((pte_val(*entry->ptep) & ~_PAGE_TLB_TRACK_MASK) !=
+            (pte_val(entry->used) & ~_PAGE_TLB_TRACK_MASK));
+#else
+    return (pte_val(*entry->ptep) != pte_val(entry->used));
+#endif
+}
+
+#endif // __ASM_P2M_ENTRY_H__
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
index 741a3d7ab8d044e0d5fe18ec89454257c28514b4..95910556ba7663b8f8e4404a3a0cac50611f32a5 100644 (file)
@@ -109,9 +109,12 @@ PERFPRIVOPADDR(thash)
 #endif
 
 // vhpt.c
+PERFCOUNTER_CPU(local_vhpt_flush,               "local_vhpt_flush")
+PERFCOUNTER_CPU(vcpu_vhpt_flush,                "vcpu_vhpt_flush")
 PERFCOUNTER_CPU(vcpu_flush_vtlb_all,            "vcpu_flush_vtlb_all")
 PERFCOUNTER_CPU(domain_flush_vtlb_all,          "domain_flush_vtlb_all")
 PERFCOUNTER_CPU(vcpu_flush_tlb_vhpt_range,      "vcpu_flush_tlb_vhpt_range")
+PERFCOUNTER_CPU(domain_flush_vtlb_track_entry,  "domain_flush_vtlb_track_entry")
 PERFCOUNTER_CPU(domain_flush_vtlb_range,        "domain_flush_vtlb_range")
 
 // domain.c
@@ -134,3 +137,30 @@ PERFCOUNTER_CPU(domain_page_flush,              "domain_page_flush")
 // dom0vp
 PERFCOUNTER_CPU(dom0vp_phystomach,              "dom0vp_phystomach")
 PERFCOUNTER_CPU(dom0vp_machtophys,              "dom0vp_machtophys")
+
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+// insert or dirty
+PERFCOUNTER_CPU(tlb_track_iod,                  "tlb_track_iod")
+PERFCOUNTER_CPU(tlb_track_iod_again,            "tlb_track_iod_again")
+PERFCOUNTER_CPU(tlb_track_iod_not_tracked,      "tlb_track_iod_not_tracked")
+PERFCOUNTER_CPU(tlb_track_iod_force_many,       "tlb_track_iod_force_many")
+PERFCOUNTER_CPU(tlb_track_iod_tracked_many,     "tlb_track_iod_tracked_many")
+PERFCOUNTER_CPU(tlb_track_iod_tracked_many_del, "tlb_track_iod_tracked_many_del")
+PERFCOUNTER_CPU(tlb_track_iod_found,            "tlb_track_iod_found")
+PERFCOUNTER_CPU(tlb_track_iod_new_entry,        "tlb_track_iod_new_entry")
+PERFCOUNTER_CPU(tlb_track_iod_new_failed,       "tlb_track_iod_new_failed")
+PERFCOUNTER_CPU(tlb_track_iod_new_many,         "tlb_track_iod_new_many")
+PERFCOUNTER_CPU(tlb_track_iod_insert,           "tlb_track_iod_insert")
+PERFCOUNTER_CPU(tlb_track_iod_dirtied,          "tlb_track_iod_dirtied")
+
+// search and remove
+PERFCOUNTER_CPU(tlb_track_sar,                  "tlb_track_sar")
+PERFCOUNTER_CPU(tlb_track_sar_not_tracked,      "tlb_track_sar_not_tracked")
+PERFCOUNTER_CPU(tlb_track_sar_not_found,        "tlb_track_sar_not_found")
+PERFCOUNTER_CPU(tlb_track_sar_found,            "tlb_track_sar_found")
+PERFCOUNTER_CPU(tlb_track_sar_many,             "tlb_track_sar_many")
+
+// flush
+PERFCOUNTER_CPU(tlb_track_use_rr7,              "tlb_track_use_rr7")
+PERFCOUNTER_CPU(tlb_track_swap_rr0,             "tlb_track_swap_rr0")
+#endif
diff --git a/xen/include/asm-ia64/tlb_track.h b/xen/include/asm-ia64/tlb_track.h
new file mode 100644 (file)
index 0000000..32f6e74
--- /dev/null
@@ -0,0 +1,152 @@
+/******************************************************************************
+ * tlb_track.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __TLB_TRACK_H__
+#define __TLB_TRACK_H__
+
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <asm/domain.h>
+#include <xen/list.h>
+#include <asm/p2m_entry.h>
+#include <asm/vcpumask.h>
+
+// TODO: compact this structure.
+struct tlb_track_entry {
+    struct list_head   list;
+
+    volatile pte_t*     ptep;           // corresponding p2m entry
+
+    /* XXX should we use TR_ENTRY? */
+    pte_t               pte_val;        // mfn and other flags
+                                        // pte_val.p = 1:
+                                        //   tlb entry is inserted.
+                                        // pte_val.p = 0: 
+                                        //   once tlb entry is inserted, so
+                                        //   this entry is created. But tlb
+                                        //   purge is isseued, so this
+                                        //   virtual address need not to be
+                                        //   purged.
+    unsigned long       vaddr;          // virtual address
+    unsigned long       rid;            // rid
+
+    cpumask_t           pcpu_dirty_mask;
+    vcpumask_t          vcpu_dirty_mask;
+    // tlbflush_timestamp;
+
+#ifdef CONFIG_TLB_TRACK_CNT
+#define TLB_TRACK_CNT_FORCE_MANY        256 /* XXX how many? */
+    unsigned long       cnt;
+#endif
+};
+
+struct tlb_track {
+
+/* see __gnttab_map_grant_ref()
+   A domain can map granted-page up to MAPTRACK_MAX_ENTRIES pages. */
+#define TLB_TRACK_LIMIT_ENTRIES                                     \
+    (MAPTRACK_MAX_ENTRIES * (PAGE_SIZE / sizeof(struct tlb_track)))
+
+    spinlock_t                  free_list_lock;
+    struct list_head            free_list;
+    unsigned int                limit;
+    unsigned int                num_entries;
+    unsigned int                num_free;
+    struct list_head            page_list;
+
+    /* XXX hash table size */
+    spinlock_t                  hash_lock;
+    unsigned int                hash_size;
+    unsigned int                hash_shift;
+    unsigned int                hash_mask;
+    struct list_head*           hash;
+};
+
+int tlb_track_create(struct domain* d);
+void tlb_track_destroy(struct domain* d);
+
+void tlb_track_free_entry(struct tlb_track* tlb_track,
+                          struct tlb_track_entry* entry);
+
+void
+__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
+                                 struct p2m_entry* entry);
+static inline void
+vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
+                               struct p2m_entry* entry)
+{
+    /* optimization.
+       non-tracking pte is most common. */
+    perfc_incrc(tlb_track_iod);
+    if (!pte_tlb_tracking(entry->used)) {
+        perfc_incrc(tlb_track_iod_not_tracked);
+        return;
+    }
+
+    __vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
+}
+
+
+/* return value
+ * NULL if this entry is used
+ * entry if this entry isn't used
+ */
+enum TLB_TRACK_RET {
+    TLB_TRACK_NOT_TRACKED,
+    TLB_TRACK_NOT_FOUND,
+    TLB_TRACK_FOUND,
+    TLB_TRACK_MANY,
+    TLB_TRACK_AGAIN,
+};
+typedef enum TLB_TRACK_RET TLB_TRACK_RET_T;
+
+TLB_TRACK_RET_T
+tlb_track_search_and_remove(struct tlb_track* tlb_track, 
+                            volatile pte_t* ptep, pte_t old_pte, 
+                            struct tlb_track_entry** entryp);
+
+void
+__tlb_track_entry_printf(const char* func, int line,
+                         const struct tlb_track_entry* entry);
+#define tlb_track_entry_printf(entry)                       \
+    __tlb_track_entry_printf(__func__, __LINE__, (entry))
+#else
+
+#define tlb_track_create(d)                                (0)
+#define tlb_track_destroy(d)                               do { } while (0)
+#define vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry) do { } while (0)
+
+#endif /* CONFIG_XEN_IA64_TLB_TRACK */
+
+#endif /* __TLB_TRACK_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
index 22abb53f3fc5966771c0b3aae9b4118ebe519132..6b3826ccbb2ed0666f99977d9ae08bef2ca8b859 100644 (file)
@@ -22,6 +22,15 @@ void domain_flush_vtlb_all (void);
 /* Global range-flush of vTLB.  */
 void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range);
 
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+struct tlb_track_entry;
+void __domain_flush_vtlb_track_entry(struct domain* d,
+                                     const struct tlb_track_entry* entry);
+/* Global entry-flush of vTLB */
+void domain_flush_vtlb_track_entry(struct domain* d,
+                                   const struct tlb_track_entry* entry);
+#endif
+
 /* Flush vhpt and mTLB on every dirty cpus.  */
 void domain_flush_tlb_vhpt(struct domain *d);
 
index 7bfc76f7615da65f1f42785612ce865c08c31e40..843138a510a2102aa769a9eb164a5540cdf120b2 100644 (file)
@@ -161,7 +161,8 @@ extern void vcpu_poke_timer(VCPU *vcpu);
 extern void vcpu_set_next_timer(VCPU *vcpu);
 extern BOOLEAN vcpu_timer_expired(VCPU *vcpu);
 extern UINT64 vcpu_deliverable_interrupts(VCPU *vcpu);
-extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64);
+struct p2m_entry;
+extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64, struct p2m_entry*);
 extern UINT64 vcpu_get_tmp(VCPU *, UINT64);
 extern void vcpu_set_tmp(VCPU *, UINT64, UINT64);
 
diff --git a/xen/include/asm-ia64/vcpumask.h b/xen/include/asm-ia64/vcpumask.h
new file mode 100644 (file)
index 0000000..7a9773e
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef __XEN_VCPUMASK_H
+#define __XEN_VCPUMASK_H
+
+/* vcpu mask
+   stolen from cpumask.h */
+typedef struct { DECLARE_BITMAP(bits, MAX_VIRT_CPUS); } vcpumask_t;
+
+#define vcpu_set(vcpu, dst) __vcpu_set((vcpu), &(dst))
+static inline void __vcpu_set(int vcpu, volatile vcpumask_t *dstp)
+{
+    set_bit(vcpu, dstp->bits);
+}
+#define vcpus_clear(dst) __vcpus_clear(&(dst), MAX_VIRT_CPUS)
+static inline void __vcpus_clear(vcpumask_t *dstp, int nbits)
+{
+    bitmap_zero(dstp->bits, nbits);
+}
+/* No static inline type checking - see Subtlety (1) above. */
+#define vcpu_isset(vcpu, vcpumask) test_bit((vcpu), (vcpumask).bits)
+
+#define first_vcpu(src) __first_vcpu(&(src), MAX_VIRT_CPUS)
+static inline int __first_vcpu(const vcpumask_t *srcp, int nbits)
+{
+    return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
+}
+
+#define next_vcpu(n, src) __next_vcpu((n), &(src), MAX_VIRT_CPUS)
+static inline int __next_vcpu(int n, const vcpumask_t *srcp, int nbits)
+{
+    return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
+}
+
+#if MAX_VIRT_CPUS > 1
+#define for_each_vcpu_mask(vcpu, mask)          \
+    for ((vcpu) = first_vcpu(mask);             \
+         (vcpu) < MAX_VIRT_CPUS;                \
+         (vcpu) = next_vcpu((vcpu), (mask)))
+#else /* NR_CPUS == 1 */
+#define for_each_vcpu_mask(vcpu, mask) for ((vcpu) = 0; (vcpu) < 1; (vcpu)++)
+#endif /* NR_CPUS */
+
+#define vcpumask_scnprintf(buf, len, src) \
+        __vcpumask_scnprintf((buf), (len), &(src), MAX_VIRT_CPUS)
+static inline int __vcpumask_scnprintf(char *buf, int len,
+                                       const vcpumask_t *srcp, int nbits)
+{
+    return bitmap_scnprintf(buf, len, srcp->bits, nbits);
+}
+
+#endif /* __XEN_VCPUMASK_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
index c59d8fd6351c1a469dda7de0b1ebcdf4e030cfef..d63a8d09b911611325318e4332d6278557b1c823 100644 (file)
 
 #ifndef __ASSEMBLY__
 #include <xen/percpu.h>
+#include <asm/vcpumask.h>
+
+extern void domain_purge_swtc_entries(struct domain *d);
+extern void domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d, vcpumask_t vcpu_dirty_mask);
 
 //
 // VHPT Long Format Entry (as recognized by hw)
index d87ca681e7420028d761da5a871a4809742107b8..fd05ff92334fdc00813e36cf5ae38ab8f83139e3 100644 (file)
@@ -358,6 +358,9 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 /* Internal only: memory attribute must be WC/UC/UCE.  */
 #define _ASSIGN_nocache                 1
 #define ASSIGN_nocache                  (1UL << _ASSIGN_nocache)
+// tlb tracking
+#define _ASSIGN_tlb_track               2
+#define ASSIGN_tlb_track                (1UL << _ASSIGN_tlb_track)
 
 /* This structure has the same layout of struct ia64_boot_param, defined in
    <asm/system.h>.  It is redefined here to ease use.  */